**********************************KOBLING med FT data***************

capture log close

log using overreporting_log, replace

* Load admin data and DNES 
use foranalysis_deskriptiv_all.dta, clear
merge 1:1 pnr using "skemadata_a_ku2_ok.dta"

**Recodings**
recode IE 3=2
gen deltog=0
replace deltog=1 if d1ny != .
recode d1ny 98=. 2=0 1=100 // Remove missing from self reported turnout

************************************************************************************************
************************************************************************************************
**************************** Create weights ****************************************************
************************************************************************************************
************************************************************************************************


gen pop_mod_sample = stemt != .
gen sample_mod_sample = _m == 3 & stemt != .

gen alder_valg_three_pop = 0 
replace alder_valg_three_pop = 1 if alder_ved_valg > 29
replace alder_valg_three_pop = 2 if alder_ved_valg > 75

* tabulate age groups for population <76 and for sample

tab alder_valg_three_pop if alder_valg_three_pop < 2 & pop_mod_sample == 1
tab alder_valg_three_pop if sample_mod_sample == 1 & alder_valg_three_pop < 2

* use proportion <30 in survey vs population
gen weights = . 
replace weights = 1 / (1152 / 604261) if alder_valg_three_pop == 0 
replace weights = 1 / (1992 / 2206051) if alder_valg_three_pop == 1

tab alder_valg_three_pop [aweight = weights] if sample_mod_sample == 1 

** Table 1

matrix define table1 = J(2, 5, 0)

tabstat stemt if alder_ved_valg < 76, stat(mean N) save // column 1
matrix define temp = r(StatTotal)

matrix table1[1 , 1] = temp[1 , 1]
matrix table1[2 , 1] = temp[2 , 1]

tabstat stemt [aweight = weights] if _m == 3, stat(mean N) save  // columnn 2
matrix define temp = r(StatTotal)

matrix table1[1 , 2] = temp[1 , 1]
matrix table1[2 , 2] = temp[2 , 1]

tabstat stemt [aweight = weights] if _m == 3, ///
by(deltog) stat(mean N) save // columns 3 and 4

matrix define temp1 = r(Stat1)
matrix define temp2 = r(Stat2)

matrix table1[1 , 3] = temp1[1 , 1]
matrix table1[2 , 3] = temp1[2 , 1]
matrix table1[1 , 4] = temp2[1 , 1]
matrix table1[2 , 4] = temp2[2 , 1]


tabstat d1ny [aweight = weights] if stemt != ., stat(mean N) save // column 5
matrix define temp = r(StatTotal)

matrix table1[1 , 5] = temp[1 , 1]
matrix table1[2 , 5] = temp[2 , 1]
tab d1ny [aweight = weights] if stemt != . // column 5

preserve 
clear
svmat table1
export delimited table1.txt, delimiter(tab) replace
restore

** descriptive stats

* recodings for descriptive stats

gen edu_elementary = hfaudd_h == 1 
gen edu_highschool = hfaudd_h == 2 
gen edu_vocational = hfaudd_h == 3
gen edu_mid_short  = hfaudd_h == 4
gen edu_long       = hfaudd_h == 5

replace edu_elementary = . if hfaudd_h == .
replace edu_highschool = . if hfaudd_h == .
replace edu_vocational = . if hfaudd_h == .
replace edu_mid_short  = . if hfaudd_h == .
replace edu_long       = . if hfaudd_h == .

gen female = koen == 2
gen native = IE_TYPE == 1 

replace female = . if koen == .
replace native = . if IE_TYPE == .

*descriptive statistics for Table 3
matrix define table3 = J(24, 6, 0)

tabstat alder_ved_valg edu_* female native [aweight = weights] if deltog == 1, ///
stat(mean sd n) format(%6.0g) by(turnout_missing) save 
matrix define temp  = r(StatTotal) 
matrix define temp2 = r(Stat1)
matrix define temp1 = r(Stat2)

forvalues i = 1/8{
	forvalues j = 1/3{
		matrix table3[`i', `j']      = temp[`j', `i']
		matrix table3[`i' + 8, `j']  = temp1[`j', `i']
		matrix table3[`i' + 16, `j'] = temp2[`j', `i']
	}
}


tabstat alder_ved_valg edu_* female native /// 
if statsb == 5100 & alder_ved_valg < 76, ///
stat(mean sd n) by(turnout_missing) save 

matrix define temp  = r(StatTotal) 
matrix define temp2 = r(Stat1)
matrix define temp1 = r(Stat2)

forvalues i = 1/8{
	forvalues j = 1/3{
		matrix table3[`i', `j' + 3]      = temp[`j', `i']
		matrix table3[`i' + 8, `j' + 3]  = temp1[`j', `i']
		matrix table3[`i' + 16, `j' + 3] = temp2[`j', `i']
	}
}

preserve
clear
svmat table3
export delimited table3.txt, delimiter(tab) replace
restore

* recode selfreported turnout to 0/1 instead of 0/100 
recode d1ny 100 = 1 

** Table 2

* Population model
logit stemt alder_ved_valg i.hfaudd_hovedgruppe i.koen i.IE ///
 if alder_ved_valg < 76 // model 1
margins, dydx(*) post
estimates store q0

* Sampling frame model
logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if _m == 3 // model 2
margins, dydx(*) post 
estimates store q1

* Respondents model
logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if _m == 3 & d1ny!=. // model 3
margins, dydx(*) post  
estimates store q2

* Self-reported model
logit d1ny alder_ved_valg i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if _m == 3 & stemt != . // model 4
margins, dydx(*) post 
estimates store q3

esttab q1 q2 q3 q0 using figure1_data.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes

** Predicting survey response: table 2

logit deltog alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if stemt != . & _m == 3
margins, dydx(*) post
estimates store q4a

logit deltog alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if _m == 3
margins, dydx(*) post
estimates store q4b

esttab q4a q4b using marginal_responded_weight.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes


************************************************
************************************************
** Supporting analyses 
************************************************
************************************************

*descriptive statistics for Table S1

matrix define tableS3 = J(8, 9, 0)
tabstat alder_ved_valg edu_* female native [aweight = weights] /// 
if _m == 3 & stemt != ., stat(mean sd n) by(deltog) save 

matrix define temp  = r(StatTotal)
matrix define temp1 = r(Stat1)
matrix define temp2 = r(Stat2)

forvalues i = 1/8{
	forvalues j = 1/3{
		matrix tableS3[`i', `j']     = temp[`j', `i']
		matrix tableS3[`i', `j' + 3] = temp1[`j', `i']
		matrix tableS3[`i', `j' + 6] = temp2[`j', `i']
	}
}

preserve
clear 
svmat tableS3 
export delimited tableS3.txt, replace delimiter(tab)
restore

** Predicting turnout among non-respondents: Table S2

logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if d1ny==. & _m == 3
margins, dydx(*) post
estimates store q5

esttab q5 using marginal_nonrespondents_weight.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes


** Predicting overreporting among respondents: Table S3

logit d1ny alder_ved_valg i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if stemt == 0
margins, dydx(*) post
estimates store q15

esttab q15 using marginal_overreport_weight.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes

** Predicted self-reported turnout for res in all municipalities, and split by
** having validated turnout or not 
** Model estimates for Figure S1 

logit d1ny alder_ved_valg i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] // all voters 
margins, dydx(*) post
estimates store q9

logit d1ny alder_ved_valg i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if turnout_missing == 1 // only missing 
margins, dydx(*) post
estimates store q10

esttab q9 q10 q3 using figures1_data.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes


** Predicting turnout in 2013 for voters with/without validated turnout
** conditional on responding to 2015 survey
** Model estimates for Figure S2

rename stemt stemtFV
rename _m _mFV
merge 1:1 pnr using "30april_to_dst_final_11_feb_2014"
keep if _merge == 3 

logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE if _mFV == 3 // 2013 turnout all
margins, dydx(*) post
estimates store q11
logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights]  if deltog == 1 & _mFV == 3 // 2013 turnout all
margins, dydx(*) post
estimates store q12
logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if turnout_missing == 0 & _mFV == 3 // 2013 turnout, validated 2015
margins, dydx(*) post
estimates store q13
logit stemt alder_ved_val i.hfaudd_hovedgruppe i.koen i.IE /// 
[pweight = weights] if turnout_missing == 0 & deltog == 1 & _mFV == 3 // 2013 turnout, validated 2015
margins, dydx(*) post
estimates store q14

esttab q11 q12 q13 q14 using figures2_data.txt, ///
replace b(a4) se(a4) nostar nogaps  nobase nopar nolines nonum noobs nonotes

capture log close


